Call Libraries
library(tidyverse)
library(caret)
library(MASS)
library(car)
library(moments)
Calling the Transformed Datasets
income_cleaned = read_csv('NYS_Corp_Tax_Credit_data/income_cleaned.csv')
Rows: 1921 Columns: 6── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Name, Group
dbl (4): Year, Num, Amount, Avg
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
income_cleaned
industry_cleaned = read_csv('NYS_Corp_Tax_Credit_data/industry_cleaned.csv')
Rows: 2476 Columns: 6── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Name, Group
dbl (4): Year, Num, Amount, Avg
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
industry_cleaned
Creating the Models
sat.model.summary <- function (df, field, sat.formula){
#Shapiro-Wilks test to evaluate normality
print(shapiro.test(df[[field]]))
#Kurtosis evaluation (normal distribution has a value close to 3)
print('kurtosis')
print(kurtosis(df[[field]]))
linear.model.cleaned = lm(sat.formula, data = df)
print(summary(linear.model.cleaned))
plot(linear.model.cleaned)
#histograms of response variable to check distribution
print(df %>%
ggplot(aes_string(field)) +
geom_histogram() +
labs(title = 'Average Credit Amount Distribution') +
theme(plot.title = element_text(hjust = 0.5)))
#Checking multicollinearity using VIF measurement
print(vif(linear.model.cleaned))
influencePlot(linear.model.cleaned)
#avPlots(linear.model.cleaned)
}
sat.formula <- Avg ~ . - Amount
sat.field <- 'Avg'
sat.model.summary(income_cleaned, sat.field, sat.formula)
Shapiro-Wilk normality test
data: df[[field]]
W = 0.17297, p-value < 2.2e-16
[1] "kurtosis"
[1] 169.7518
Call:
lm(formula = sat.formula, data = df)
Residuals:
Min 1Q Median 3Q Max
-11338570 -547702 94139 421302 87181761
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.157e+08 5.512e+07 -2.100 0.03590
Year 5.721e+04 2.731e+04 2.095 0.03634
NameAlternative Fuels and Electric Vehicle Recharging Property Credit -3.287e+05 1.242e+06 -0.265 0.79140
NameAlternative Minimum Tax Credit 6.538e+05 9.757e+05 0.670 0.50286
NameBeer Production Credit 3.316e+05 1.290e+06 0.257 0.79715
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 1.429e+06 9.960e+05 1.434 0.15162
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 4.098e+06 1.421e+06 2.883 0.00398
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 1.182e+06 9.927e+05 1.191 0.23386
NameBrownfield Tax Credits - Remediation Real Property Tax Credit -7.587e+04 9.920e+05 -0.076 0.93904
NameClean Heating Fuel Credit 7.148e+04 1.024e+06 0.070 0.94436
NameConservation Easement Tax Credit 1.143e+05 1.064e+06 0.107 0.91444
NameCredit for Employment of Persons with Disabilities -9.343e+05 1.119e+06 -0.835 0.40391
NameCredit for Purchase of an Automated External Defibrillator -1.449e+05 9.695e+05 -0.150 0.88117
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities 2.316e+05 1.779e+06 0.130 0.89645
NameEmpire State Apprentice Tax Credit -7.983e+05 1.524e+06 -0.524 0.60040
NameEmpire State Commercial Production Credit 2.183e+05 1.291e+06 0.169 0.86576
NameEmpire State Film Post Production Credit 2.521e+04 1.049e+06 0.024 0.98082
NameEmpire State Film Production Credit 1.145e+07 9.967e+05 11.485 < 2e-16
NameEmpire State Musical and Theatrical Production Credit -2.342e+05 1.775e+06 -0.132 0.89507
NameExcelsior Jobs Program Credit 1.035e+05 9.545e+05 0.108 0.91366
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit 2.752e+06 8.862e+05 3.105 0.00193
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit 4.845e+05 9.213e+05 0.526 0.59899
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes 1.245e+06 9.239e+05 1.348 0.17785
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners -1.025e+05 9.549e+05 -0.107 0.91455
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit -6.145e+04 9.387e+05 -0.065 0.94781
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners 9.119e+04 1.044e+06 0.087 0.93038
NameFarm Workforce Retention Credit -7.802e+03 1.285e+06 -0.006 0.99516
NameFarmers' School Tax Credit 1.440e+05 1.021e+06 0.141 0.88786
NameHire a Veteran Credit -1.238e+06 1.782e+06 -0.695 0.48721
NameHistoric Properties Rehabilitation Credit 1.352e+06 1.055e+06 1.282 0.20015
NameIndustrial or Manufacturing Business Tax Credit 4.997e+05 1.074e+06 0.465 0.64173
NameInvestment Tax Credit 4.463e+05 8.895e+05 0.502 0.61588
NameInvestment Tax Credit for the Financial Services Industry 3.976e+05 1.008e+06 0.395 0.69325
NameLife Sciences Research & Development Tax Credit -8.207e+04 2.099e+06 -0.039 0.96882
NameLong-Term Care Insurance Credit 1.246e+05 9.808e+05 0.127 0.89892
NameLow-Income Housing Credit -2.764e+04 1.113e+06 -0.025 0.98020
NameMinimum Wage Reimbursement Credit -2.931e+05 1.006e+06 -0.291 0.77075
NameMortgage Servicing Tax Credit -4.540e+05 1.085e+06 -0.418 0.67565
NameNew York Youth Jobs Program Tax Credit -2.587e+05 9.381e+05 -0.276 0.78279
NameQETC Capital Tax Credit 2.616e+05 1.386e+06 0.189 0.85032
NameQETC Employment Credit 1.335e+05 1.026e+06 0.130 0.89641
NameQETC Facilities, Operations, and Training Credit 5.153e+05 1.918e+06 0.269 0.78820
NameReal Property Tax Relief Credit for Manufacturing -2.635e+05 9.654e+05 -0.273 0.78490
NameSpecial Additional Mortgage Recording Tax Credit 3.678e+04 9.244e+05 0.040 0.96827
NameSTART-UP NY Tax Elimination Credit 4.061e+03 1.130e+06 0.004 0.99713
Group1,000,000 - 24,999,999 2.170e+05 3.501e+05 0.620 0.53548
Group100,000 - 499,999 1.352e+05 3.579e+05 0.378 0.70572
Group100,000,000 - 499,999,999 9.644e+05 3.937e+05 2.449 0.01441
Group25,000,000 - 49,999,999 4.519e+05 4.223e+05 1.070 0.28474
Group50,000,000 - 99,999,999 4.021e+05 4.235e+05 0.950 0.34247
Group500,000 - 999,999 2.195e+05 3.880e+05 0.566 0.57170
Group500,000,000 - and over 3.073e+06 3.846e+05 7.991 2.33e-15
GroupZero or Net Loss 9.536e+05 3.339e+05 2.856 0.00433
Num -4.520e+02 8.569e+02 -0.527 0.59791
(Intercept) *
Year *
NameAlternative Fuels and Electric Vehicle Recharging Property Credit
NameAlternative Minimum Tax Credit
NameBeer Production Credit
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 **
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08
NameBrownfield Tax Credits - Remediation Real Property Tax Credit
NameClean Heating Fuel Credit
NameConservation Easement Tax Credit
NameCredit for Employment of Persons with Disabilities
NameCredit for Purchase of an Automated External Defibrillator
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities
NameEmpire State Apprentice Tax Credit
NameEmpire State Commercial Production Credit
NameEmpire State Film Post Production Credit
NameEmpire State Film Production Credit ***
NameEmpire State Musical and Theatrical Production Credit
NameExcelsior Jobs Program Credit
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit **
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners
NameFarm Workforce Retention Credit
NameFarmers' School Tax Credit
NameHire a Veteran Credit
NameHistoric Properties Rehabilitation Credit
NameIndustrial or Manufacturing Business Tax Credit
NameInvestment Tax Credit
NameInvestment Tax Credit for the Financial Services Industry
NameLife Sciences Research & Development Tax Credit
NameLong-Term Care Insurance Credit
NameLow-Income Housing Credit
NameMinimum Wage Reimbursement Credit
NameMortgage Servicing Tax Credit
NameNew York Youth Jobs Program Tax Credit
NameQETC Capital Tax Credit
NameQETC Employment Credit
NameQETC Facilities, Operations, and Training Credit
NameReal Property Tax Relief Credit for Manufacturing
NameSpecial Additional Mortgage Recording Tax Credit
NameSTART-UP NY Tax Elimination Credit
Group1,000,000 - 24,999,999
Group100,000 - 499,999
Group100,000,000 - 499,999,999 *
Group25,000,000 - 49,999,999
Group50,000,000 - 99,999,999
Group500,000 - 999,999
Group500,000,000 - and over ***
GroupZero or Net Loss **
Num
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3868000 on 1867 degrees of freedom
Multiple R-squared: 0.2361, Adjusted R-squared: 0.2144
F-statistic: 10.88 on 53 and 1867 DF, p-value: < 2.2e-16
GVIF Df GVIF^(1/(2*Df))
Year 2.125869 1 1.458036
Name 3.521453 43 1.014746
Group 1.510763 8 1.026124
Num 1.645484 1 1.282764
income.model <- lm(sat.formula, data = income_cleaned)
sat.model.summary(industry_cleaned, sat.field, sat.formula)
Shapiro-Wilk normality test
data: df[[field]]
W = 0.22287, p-value < 2.2e-16
[1] "kurtosis"
[1] 135.5482
Call:
lm(formula = sat.formula, data = df)
Residuals:
Min 1Q Median 3Q Max
-11581741 -371031 -23164 154182 28917959
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.926e+07 2.051e+07 -1.914 0.055677
Year 1.936e+04 1.016e+04 1.905 0.056959
NameAlternative Fuels and Electric Vehicle Recharging Property Credit 5.584e+04 5.062e+05 0.110 0.912180
NameAlternative Minimum Tax Credit 3.523e+05 4.208e+05 0.837 0.402556
NameBeer Production Credit 8.799e+04 6.261e+05 0.141 0.888249
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 1.919e+06 4.473e+05 4.290 1.86e-05
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 4.064e+06 7.278e+05 5.584 2.62e-08
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 1.054e+06 4.779e+05 2.206 0.027462
NameBrownfield Tax Credits - Remediation Real Property Tax Credit 1.327e+05 4.760e+05 0.279 0.780429
NameClean Heating Fuel Credit 2.785e+05 4.595e+05 0.606 0.544549
NameConservation Easement Tax Credit 2.743e+04 4.700e+05 0.058 0.953457
NameCredit for Employment of Persons with Disabilities 1.574e+05 5.069e+05 0.311 0.756142
NameCredit for Purchase of an Automated External Defibrillator 1.082e+05 4.465e+05 0.242 0.808639
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities 2.090e+05 8.286e+05 0.252 0.800866
NameEmpire State Apprentice Tax Credit -3.324e+05 1.012e+06 -0.329 0.742537
NameEmpire State Commercial Production Credit 3.524e+05 6.172e+05 0.571 0.568123
NameEmpire State Film Post Production Credit 5.332e+05 5.224e+05 1.021 0.307585
NameEmpire State Film Production Credit 1.170e+07 4.785e+05 24.458 < 2e-16
NameEmpire State Musical and Theatrical Production Credit 7.676e+04 9.010e+05 0.085 0.932112
NameExcelsior Jobs Program Credit 7.086e+05 4.393e+05 1.613 0.106849
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit 1.381e+06 4.318e+05 3.199 0.001399
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit 3.715e+05 4.213e+05 0.882 0.377942
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes 1.160e+06 4.192e+05 2.767 0.005700
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners 3.634e+05 4.311e+05 0.843 0.399359
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit 2.567e+05 4.265e+05 0.602 0.547350
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners 7.379e+04 5.041e+05 0.146 0.883634
NameFarm Workforce Retention Credit 2.727e+04 5.352e+05 0.051 0.959361
NameFarmers' School Tax Credit 1.287e+05 5.133e+05 0.251 0.802102
NameHire a Veteran Credit 1.459e+05 8.253e+05 0.177 0.859709
NameHistoric Properties Rehabilitation Credit 1.875e+06 4.841e+05 3.874 0.000110
NameInvestment Tax Credit 7.182e+05 4.129e+05 1.739 0.082075
NameInvestment Tax Credit for the Financial Services Industry 6.339e+05 5.757e+05 1.101 0.270913
NameLife Sciences Research & Development Tax Credit -2.243e+03 9.007e+05 -0.002 0.998014
NameLong-Term Care Insurance Credit 1.385e+05 4.200e+05 0.330 0.741537
NameLow-Income Housing Credit 1.642e+06 5.494e+05 2.988 0.002833
NameMinimum Wage Reimbursement Credit 9.624e+04 4.381e+05 0.220 0.826159
NameMortgage Servicing Tax Credit 2.077e+05 6.462e+05 0.321 0.747934
NameNew York Youth Jobs Program Tax Credit 1.953e+05 4.273e+05 0.457 0.647604
NameQETC Capital Tax Credit 2.986e+05 5.868e+05 0.509 0.610823
NameQETC Employment Credit 8.982e+04 4.465e+05 0.201 0.840607
NameQETC Facilities, Operations, and Training Credit 3.477e+05 6.711e+05 0.518 0.604441
NameReal Property Tax Relief Credit for Manufacturing 1.520e+05 4.423e+05 0.344 0.731171
NameSpecial Additional Mortgage Recording Tax Credit 2.609e+05 4.432e+05 0.589 0.556155
NameSTART-UP NY Tax Elimination Credit 1.703e+04 4.750e+05 0.036 0.971411
GroupAdministrative and Support and Waste Management and Remediation Services -2.280e+03 2.519e+05 -0.009 0.992777
GroupAdministrative/Support/Waste Management/Remediation Services -3.173e+03 2.798e+05 -0.011 0.990952
GroupAgriculture, Forestry, Fishing and Hunting 7.269e+04 2.330e+05 0.312 0.755097
GroupArts, Entertainment, and Recreation 5.866e+05 2.317e+05 2.532 0.011408
GroupConstruction -1.699e+04 2.171e+05 -0.078 0.937621
GroupEducational Services 5.569e+04 2.948e+05 0.189 0.850176
GroupFinance and Insurance 2.139e+05 2.034e+05 1.051 0.293237
GroupHealth Care and Social Assistance 1.300e+04 2.290e+05 0.057 0.954731
GroupInformation -2.433e+05 2.178e+05 -1.117 0.263917
GroupManagement of Companies and Enterprises 3.355e+05 1.949e+05 1.721 0.085332
GroupManufacturing 6.786e+05 2.028e+05 3.346 0.000831
GroupMining -1.561e+04 3.593e+05 -0.043 0.965352
GroupMining, Quarrying, and Oil and Gas Extraction 1.084e+05 3.011e+05 0.360 0.718901
GroupOther Services (except Public Administration) -6.462e+04 2.217e+05 -0.291 0.770697
GroupProfessional, Scientific, and Technical Services 3.736e+05 2.085e+05 1.792 0.073214
GroupReal Estate and Rental and Leasing 9.506e+04 2.044e+05 0.465 0.641882
GroupRetail Trade 4.746e+04 2.036e+05 0.233 0.815756
GroupTransportation and Warehousing -1.866e+04 2.300e+05 -0.081 0.935321
GroupUtilities 5.308e+05 2.633e+05 2.016 0.043917
GroupWholesale Trade 6.106e+04 2.081e+05 0.293 0.769261
Num -9.657e+02 4.272e+02 -2.260 0.023889
(Intercept) .
Year .
NameAlternative Fuels and Electric Vehicle Recharging Property Credit
NameAlternative Minimum Tax Credit
NameBeer Production Credit
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 ***
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 ***
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 *
NameBrownfield Tax Credits - Remediation Real Property Tax Credit
NameClean Heating Fuel Credit
NameConservation Easement Tax Credit
NameCredit for Employment of Persons with Disabilities
NameCredit for Purchase of an Automated External Defibrillator
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities
NameEmpire State Apprentice Tax Credit
NameEmpire State Commercial Production Credit
NameEmpire State Film Post Production Credit
NameEmpire State Film Production Credit ***
NameEmpire State Musical and Theatrical Production Credit
NameExcelsior Jobs Program Credit
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit **
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes **
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners
NameFarm Workforce Retention Credit
NameFarmers' School Tax Credit
NameHire a Veteran Credit
NameHistoric Properties Rehabilitation Credit ***
NameInvestment Tax Credit .
NameInvestment Tax Credit for the Financial Services Industry
NameLife Sciences Research & Development Tax Credit
NameLong-Term Care Insurance Credit
NameLow-Income Housing Credit **
NameMinimum Wage Reimbursement Credit
NameMortgage Servicing Tax Credit
NameNew York Youth Jobs Program Tax Credit
NameQETC Capital Tax Credit
NameQETC Employment Credit
NameQETC Facilities, Operations, and Training Credit
NameReal Property Tax Relief Credit for Manufacturing
NameSpecial Additional Mortgage Recording Tax Credit
NameSTART-UP NY Tax Elimination Credit
GroupAdministrative and Support and Waste Management and Remediation Services
GroupAdministrative/Support/Waste Management/Remediation Services
GroupAgriculture, Forestry, Fishing and Hunting
GroupArts, Entertainment, and Recreation *
GroupConstruction
GroupEducational Services
GroupFinance and Insurance
GroupHealth Care and Social Assistance
GroupInformation
GroupManagement of Companies and Enterprises .
GroupManufacturing ***
GroupMining
GroupMining, Quarrying, and Oil and Gas Extraction
GroupOther Services (except Public Administration)
GroupProfessional, Scientific, and Technical Services .
GroupReal Estate and Rental and Leasing
GroupRetail Trade
GroupTransportation and Warehousing
GroupUtilities *
GroupWholesale Trade
Num *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1612000 on 2411 degrees of freedom
Multiple R-squared: 0.4674, Adjusted R-squared: 0.4533
F-statistic: 33.06 on 64 and 2411 DF, p-value: < 2.2e-16
GVIF Df GVIF^(1/(2*Df))
Year 2.190806 1 1.480137
Name 5.185764 42 1.019787
Group 2.724217 20 1.025371
Num 1.370920 1 1.170863
industry.model <- lm(sat.formula, data = industry_cleaned)
Correcting violation of Normality in previous model with BoxCox transform
bc_func <- function (lm.cleaned, lambda.range){
bc = boxCox(lm.cleaned, lambda = lambda.range)
#Extracting the best lambda value.
return(bc$x[which(bc$y == max(bc$y))])
}
#Income Group Dataset
income.lambda.bc = bc_func(income.model, seq(-0.2, 0.2, 1/10))
income.lambda.bc
[1] -0.01414141
#Industry Group Dataset
industry.lambda.bc = bc_func(industry.model, seq(-0.2, 0.2, 1/10))
industry.lambda.bc
[1] -0.03434343
bc_transform <- function(df, lambda.bc){
return (df %>%
mutate(Avg.bc = (Avg^lambda.bc -1)/lambda.bc) %>%
select(-c(Avg, Amount)))
}
#Income Group Dataset
income_cleaned_bc <- bc_transform(income_cleaned, income.lambda.bc)
income.model.bc = lm(Avg.bc ~ ., data = income_cleaned_bc)
#Industry Group Dataset
industry_cleaned_bc <- bc_transform(industry_cleaned, industry.lambda.bc)
industry.model.bc = lm(Avg.bc ~ ., data = industry_cleaned_bc)
Checking linear regression assumptions for the transformed data.
sat.formula.bc <- Avg.bc ~ .
sat.field.bc <- 'Avg.bc'
#Income
sat.model.summary(income_cleaned_bc, sat.field.bc, sat.formula.bc)
Shapiro-Wilk normality test
data: df[[field]]
W = 0.99696, p-value = 0.000782
[1] "kurtosis"
[1] -0.2553313
Call:
lm(formula = sat.formula, data = df)
Residuals:
Min 1Q Median 3Q Max
-6.6597 -0.5738 -0.0113 0.5668 4.4826
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.634e+01 1.456e+01 -2.496 0.012643
Year 2.288e-02 7.214e-03 3.171 0.001543
NameAlternative Fuels and Electric Vehicle Recharging Property Credit -8.643e-01 3.281e-01 -2.634 0.008510
NameAlternative Minimum Tax Credit -2.052e+00 2.577e-01 -7.962 2.91e-15
NameBeer Production Credit 5.687e-01 3.407e-01 1.669 0.095234
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 1.715e+00 2.630e-01 6.521 8.99e-11
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 2.676e+00 3.754e-01 7.130 1.43e-12
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 1.331e+00 2.622e-01 5.076 4.24e-07
NameBrownfield Tax Credits - Remediation Real Property Tax Credit 5.458e-02 2.620e-01 0.208 0.834991
NameClean Heating Fuel Credit -3.086e+00 2.705e-01 -11.409 < 2e-16
NameConservation Easement Tax Credit -2.137e+00 2.810e-01 -7.606 4.45e-14
NameCredit for Employment of Persons with Disabilities -3.118e+00 2.956e-01 -10.547 < 2e-16
NameCredit for Purchase of an Automated External Defibrillator -2.938e+00 2.560e-01 -11.473 < 2e-16
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities -5.673e-01 4.698e-01 -1.207 0.227430
NameEmpire State Apprentice Tax Credit -2.207e+00 4.024e-01 -5.485 4.71e-08
NameEmpire State Commercial Production Credit 6.075e-02 3.410e-01 0.178 0.858634
NameEmpire State Film Post Production Credit 1.101e+00 2.769e-01 3.977 7.25e-05
NameEmpire State Film Production Credit 2.853e+00 2.632e-01 10.838 < 2e-16
NameEmpire State Musical and Theatrical Production Credit 2.508e-01 4.688e-01 0.535 0.592803
NameExcelsior Jobs Program Credit 4.651e-01 2.521e-01 1.845 0.065178
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit 8.876e-01 2.341e-01 3.792 0.000154
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit 1.775e-01 2.433e-01 0.730 0.465701
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes 1.220e+00 2.440e-01 5.000 6.26e-07
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners 1.182e-01 2.522e-01 0.469 0.639260
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit -8.497e-01 2.479e-01 -3.427 0.000623
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners -1.358e+00 2.756e-01 -4.928 9.02e-07
NameFarm Workforce Retention Credit -1.612e+00 3.394e-01 -4.749 2.20e-06
NameFarmers' School Tax Credit -1.311e+00 2.696e-01 -4.863 1.26e-06
NameHire a Veteran Credit -2.915e+00 4.706e-01 -6.195 7.15e-10
NameHistoric Properties Rehabilitation Credit 1.918e+00 2.786e-01 6.886 7.81e-12
NameIndustrial or Manufacturing Business Tax Credit -1.718e+00 2.836e-01 -6.059 1.66e-09
NameInvestment Tax Credit 7.147e-02 2.349e-01 0.304 0.760985
NameInvestment Tax Credit for the Financial Services Industry 3.176e-01 2.662e-01 1.193 0.232870
NameLife Sciences Research & Development Tax Credit 7.033e-01 5.543e-01 1.269 0.204728
NameLong-Term Care Insurance Credit -2.863e+00 2.590e-01 -11.051 < 2e-16
NameLow-Income Housing Credit -9.063e-01 2.940e-01 -3.083 0.002080
NameMinimum Wage Reimbursement Credit -1.241e+00 2.656e-01 -4.674 3.17e-06
NameMortgage Servicing Tax Credit -9.378e-01 2.865e-01 -3.273 0.001084
NameNew York Youth Jobs Program Tax Credit -1.330e+00 2.478e-01 -5.367 9.01e-08
NameQETC Capital Tax Credit 4.561e-01 3.661e-01 1.246 0.212956
NameQETC Employment Credit -5.888e-01 2.709e-01 -2.174 0.029855
NameQETC Facilities, Operations, and Training Credit 5.799e-01 5.065e-01 1.145 0.252462
NameReal Property Tax Relief Credit for Manufacturing -1.518e+00 2.550e-01 -5.956 3.09e-09
NameSpecial Additional Mortgage Recording Tax Credit -2.309e-01 2.441e-01 -0.946 0.344374
NameSTART-UP NY Tax Elimination Credit -2.193e+00 2.985e-01 -7.345 3.05e-13
Group1,000,000 - 24,999,999 1.090e+00 9.246e-02 11.785 < 2e-16
Group100,000 - 499,999 3.590e-01 9.451e-02 3.798 0.000150
Group100,000,000 - 499,999,999 1.685e+00 1.040e-01 16.202 < 2e-16
Group25,000,000 - 49,999,999 1.325e+00 1.115e-01 11.883 < 2e-16
Group50,000,000 - 99,999,999 1.473e+00 1.118e-01 13.172 < 2e-16
Group500,000 - 999,999 6.032e-01 1.025e-01 5.886 4.67e-09
Group500,000,000 - and over 2.332e+00 1.016e-01 22.953 < 2e-16
GroupZero or Net Loss 9.934e-01 8.817e-02 11.267 < 2e-16
Num -1.533e-03 2.263e-04 -6.775 1.66e-11
(Intercept) *
Year **
NameAlternative Fuels and Electric Vehicle Recharging Property Credit **
NameAlternative Minimum Tax Credit ***
NameBeer Production Credit .
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 ***
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 ***
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 ***
NameBrownfield Tax Credits - Remediation Real Property Tax Credit
NameClean Heating Fuel Credit ***
NameConservation Easement Tax Credit ***
NameCredit for Employment of Persons with Disabilities ***
NameCredit for Purchase of an Automated External Defibrillator ***
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities
NameEmpire State Apprentice Tax Credit ***
NameEmpire State Commercial Production Credit
NameEmpire State Film Post Production Credit ***
NameEmpire State Film Production Credit ***
NameEmpire State Musical and Theatrical Production Credit
NameExcelsior Jobs Program Credit .
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit ***
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes ***
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit ***
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners ***
NameFarm Workforce Retention Credit ***
NameFarmers' School Tax Credit ***
NameHire a Veteran Credit ***
NameHistoric Properties Rehabilitation Credit ***
NameIndustrial or Manufacturing Business Tax Credit ***
NameInvestment Tax Credit
NameInvestment Tax Credit for the Financial Services Industry
NameLife Sciences Research & Development Tax Credit
NameLong-Term Care Insurance Credit ***
NameLow-Income Housing Credit **
NameMinimum Wage Reimbursement Credit ***
NameMortgage Servicing Tax Credit **
NameNew York Youth Jobs Program Tax Credit ***
NameQETC Capital Tax Credit
NameQETC Employment Credit *
NameQETC Facilities, Operations, and Training Credit
NameReal Property Tax Relief Credit for Manufacturing ***
NameSpecial Additional Mortgage Recording Tax Credit
NameSTART-UP NY Tax Elimination Credit ***
Group1,000,000 - 24,999,999 ***
Group100,000 - 499,999 ***
Group100,000,000 - 499,999,999 ***
Group25,000,000 - 49,999,999 ***
Group50,000,000 - 99,999,999 ***
Group500,000 - 999,999 ***
Group500,000,000 - and over ***
GroupZero or Net Loss ***
Num ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.022 on 1867 degrees of freedom
Multiple R-squared: 0.7368, Adjusted R-squared: 0.7293
F-statistic: 98.61 on 53 and 1867 DF, p-value: < 2.2e-16
GVIF Df GVIF^(1/(2*Df))
Year 2.125869 1 1.458036
Name 3.521453 43 1.014746
Group 1.510763 8 1.026124
Num 1.645484 1 1.282764
#Industry
sat.model.summary(industry_cleaned_bc, sat.field.bc, sat.formula.bc)
Shapiro-Wilk normality test
data: df[[field]]
W = 0.9902, p-value = 5.826e-12
[1] "kurtosis"
[1] -0.4869026
Call:
lm(formula = sat.formula, data = df)
Residuals:
Min 1Q Median 3Q Max
-6.2888 -0.4697 0.0183 0.4928 3.9068
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -6.095e+01 1.107e+01 -5.505 4.09e-08
Year 3.440e-02 5.488e-03 6.269 4.30e-10
NameAlternative Fuels and Electric Vehicle Recharging Property Credit 2.665e-01 2.733e-01 0.975 0.329737
NameAlternative Minimum Tax Credit -2.463e+00 2.272e-01 -10.839 < 2e-16
NameBeer Production Credit 6.334e-01 3.381e-01 1.873 0.061126
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 2.198e+00 2.415e-01 9.100 < 2e-16
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 2.727e+00 3.930e-01 6.939 5.06e-12
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 1.586e+00 2.581e-01 6.148 9.17e-10
NameBrownfield Tax Credits - Remediation Real Property Tax Credit 9.695e-01 2.570e-01 3.772 0.000166
NameClean Heating Fuel Credit -2.544e+00 2.481e-01 -10.252 < 2e-16
NameConservation Easement Tax Credit -1.123e+00 2.538e-01 -4.423 1.02e-05
NameCredit for Employment of Persons with Disabilities -1.245e+00 2.737e-01 -4.549 5.67e-06
NameCredit for Purchase of an Automated External Defibrillator -1.535e+00 2.411e-01 -6.368 2.29e-10
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities -8.171e-02 4.474e-01 -0.183 0.855119
NameEmpire State Apprentice Tax Credit -8.683e-01 5.464e-01 -1.589 0.112125
NameEmpire State Commercial Production Credit 6.235e-01 3.333e-01 1.871 0.061481
NameEmpire State Film Post Production Credit 1.439e+00 2.821e-01 5.100 3.66e-07
NameEmpire State Film Production Credit 3.232e+00 2.584e-01 12.509 < 2e-16
NameEmpire State Musical and Theatrical Production Credit 1.012e+00 4.865e-01 2.080 0.037608
NameExcelsior Jobs Program Credit 1.643e+00 2.372e-01 6.929 5.42e-12
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit 1.310e+00 2.332e-01 5.617 2.17e-08
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit 7.127e-01 2.275e-01 3.133 0.001750
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes 1.707e+00 2.264e-01 7.540 6.61e-14
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners 9.777e-01 2.328e-01 4.200 2.77e-05
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit 1.609e-01 2.303e-01 0.699 0.484921
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners -5.241e-01 2.722e-01 -1.925 0.054320
NameFarm Workforce Retention Credit -8.623e-01 2.890e-01 -2.984 0.002876
NameFarmers' School Tax Credit -1.158e+00 2.771e-01 -4.179 3.03e-05
NameHire a Veteran Credit -1.040e+00 4.456e-01 -2.335 0.019638
NameHistoric Properties Rehabilitation Credit 2.397e+00 2.614e-01 9.172 < 2e-16
NameInvestment Tax Credit 6.981e-01 2.230e-01 3.131 0.001763
NameInvestment Tax Credit for the Financial Services Industry 1.560e+00 3.108e-01 5.019 5.59e-07
NameLife Sciences Research & Development Tax Credit 9.057e-01 4.864e-01 1.862 0.062697
NameLong-Term Care Insurance Credit -1.867e+00 2.268e-01 -8.231 3.01e-16
NameLow-Income Housing Credit 1.331e+00 2.967e-01 4.487 7.57e-06
NameMinimum Wage Reimbursement Credit -9.161e-01 2.366e-01 -3.872 0.000111
NameMortgage Servicing Tax Credit 8.932e-01 3.489e-01 2.560 0.010534
NameNew York Youth Jobs Program Tax Credit 1.263e-01 2.307e-01 0.548 0.584069
NameQETC Capital Tax Credit 1.162e+00 3.168e-01 3.667 0.000251
NameQETC Employment Credit -3.048e-01 2.411e-01 -1.264 0.206249
NameQETC Facilities, Operations, and Training Credit 1.147e+00 3.624e-01 3.165 0.001571
NameReal Property Tax Relief Credit for Manufacturing -6.559e-01 2.388e-01 -2.746 0.006077
NameSpecial Additional Mortgage Recording Tax Credit 7.505e-01 2.393e-01 3.136 0.001736
NameSTART-UP NY Tax Elimination Credit -1.834e+00 2.565e-01 -7.151 1.14e-12
GroupAdministrative and Support and Waste Management and Remediation Services 2.878e-01 1.360e-01 2.116 0.034417
GroupAdministrative/Support/Waste Management/Remediation Services 1.675e-01 1.511e-01 1.109 0.267612
GroupAgriculture, Forestry, Fishing and Hunting -1.132e-01 1.258e-01 -0.899 0.368486
GroupArts, Entertainment, and Recreation 5.595e-01 1.251e-01 4.473 8.09e-06
GroupConstruction -4.299e-02 1.172e-01 -0.367 0.713860
GroupEducational Services 2.253e-01 1.592e-01 1.415 0.157160
GroupFinance and Insurance 5.350e-01 1.099e-01 4.870 1.19e-06
GroupHealth Care and Social Assistance -6.777e-02 1.237e-01 -0.548 0.583720
GroupInformation 7.098e-01 1.176e-01 6.037 1.81e-09
GroupManagement of Companies and Enterprises 6.672e-01 1.052e-01 6.340 2.73e-10
GroupManufacturing 4.608e-01 1.095e-01 4.208 2.67e-05
GroupMining 2.515e-01 1.940e-01 1.296 0.194977
GroupMining, Quarrying, and Oil and Gas Extraction 3.869e-01 1.626e-01 2.379 0.017421
GroupOther Services (except Public Administration) -1.550e-01 1.197e-01 -1.295 0.195482
GroupProfessional, Scientific, and Technical Services 4.875e-01 1.126e-01 4.331 1.55e-05
GroupReal Estate and Rental and Leasing 1.461e-01 1.104e-01 1.324 0.185596
GroupRetail Trade 3.754e-01 1.100e-01 3.414 0.000650
GroupTransportation and Warehousing 2.125e-01 1.242e-01 1.711 0.087179
GroupUtilities 6.972e-01 1.422e-01 4.904 1.00e-06
GroupWholesale Trade 4.367e-01 1.124e-01 3.886 0.000105
Num 4.542e-04 2.307e-04 1.969 0.049082
(Intercept) ***
Year ***
NameAlternative Fuels and Electric Vehicle Recharging Property Credit
NameAlternative Minimum Tax Credit ***
NameBeer Production Credit .
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 ***
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 ***
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 ***
NameBrownfield Tax Credits - Remediation Real Property Tax Credit ***
NameClean Heating Fuel Credit ***
NameConservation Easement Tax Credit ***
NameCredit for Employment of Persons with Disabilities ***
NameCredit for Purchase of an Automated External Defibrillator ***
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities
NameEmpire State Apprentice Tax Credit
NameEmpire State Commercial Production Credit .
NameEmpire State Film Post Production Credit ***
NameEmpire State Film Production Credit ***
NameEmpire State Musical and Theatrical Production Credit *
NameExcelsior Jobs Program Credit ***
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit ***
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit **
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes ***
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners ***
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners .
NameFarm Workforce Retention Credit **
NameFarmers' School Tax Credit ***
NameHire a Veteran Credit *
NameHistoric Properties Rehabilitation Credit ***
NameInvestment Tax Credit **
NameInvestment Tax Credit for the Financial Services Industry ***
NameLife Sciences Research & Development Tax Credit .
NameLong-Term Care Insurance Credit ***
NameLow-Income Housing Credit ***
NameMinimum Wage Reimbursement Credit ***
NameMortgage Servicing Tax Credit *
NameNew York Youth Jobs Program Tax Credit
NameQETC Capital Tax Credit ***
NameQETC Employment Credit
NameQETC Facilities, Operations, and Training Credit **
NameReal Property Tax Relief Credit for Manufacturing **
NameSpecial Additional Mortgage Recording Tax Credit **
NameSTART-UP NY Tax Elimination Credit ***
GroupAdministrative and Support and Waste Management and Remediation Services *
GroupAdministrative/Support/Waste Management/Remediation Services
GroupAgriculture, Forestry, Fishing and Hunting
GroupArts, Entertainment, and Recreation ***
GroupConstruction
GroupEducational Services
GroupFinance and Insurance ***
GroupHealth Care and Social Assistance
GroupInformation ***
GroupManagement of Companies and Enterprises ***
GroupManufacturing ***
GroupMining
GroupMining, Quarrying, and Oil and Gas Extraction *
GroupOther Services (except Public Administration)
GroupProfessional, Scientific, and Technical Services ***
GroupReal Estate and Rental and Leasing
GroupRetail Trade ***
GroupTransportation and Warehousing .
GroupUtilities ***
GroupWholesale Trade ***
Num *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8703 on 2411 degrees of freedom
Multiple R-squared: 0.7631, Adjusted R-squared: 0.7569
F-statistic: 121.4 on 64 and 2411 DF, p-value: < 2.2e-16
GVIF Df GVIF^(1/(2*Df))
Year 2.190806 1 1.480137
Name 5.185764 42 1.019787
Group 2.724217 20 1.025371
Num 1.370920 1 1.170863
BIC comparison before and after BoxCox transform
BIC(income.model.bc, income.model)
BIC(industry.model.bc, industry.model)
Stepwise Regression on Income_cat_bc (boxcox transformed dataset)
#creating dummy variable columns for stepwise
dummy_func <- function (df){
x = model.matrix(Avg.bc ~., df)[, -1]
dummy_bc = as.data.frame(x) %>% mutate(Avg.bc = df$Avg.bc)
colnames(dummy_bc) <- str_replace_all(colnames(dummy_bc), "-|'|/| |,|�|&" , '_')
return(dummy_bc)
}
Cleaning column names further so stepwise regression doesn’t present any errors
#Income Group Dataset
income.dummy.bc <- dummy_func(income_cleaned_bc)
#colnames(income.dummy.bc)[37] <- 'NameManufactureru0092s_Real_Property_Tax_Credit'
colnames(income.dummy.bc)
#Industry Group Dataset
industry.dummy.bc <- dummy_func(industry_cleaned_bc)
#colnames(industry.dummy.bc)[35] <- 'NameManufactureru0092s_Real_Property_Tax_Credit'
colnames(industry.dummy.bc)
Stepwise regression using BIC as the criteria (the penalty k = log(n)).
#Creating Stepwise Models
bcs = list(income = income.dummy.bc, industry = industry.dummy.bc)
model.fulls = list(income = lm(Avg.bc ~ ., data = income.dummy.bc), industry = lm(Avg.bc ~ ., data = industry.dummy.bc)) #All variables
model.emptys = list(income = lm(Avg.bc ~ 1, data = income.dummy.bc), industry = lm(Avg.bc ~ 1, data = industry.dummy.bc)) #intercept only
k = c('income', 'industry')
forwardBIC = list(income = NULL, industry = NULL)
backwardBIC = list(income = NULL, industry = NULL)
for (i in k){
bc = bcs[[i]]
scope = list(lower = formula(model.emptys[[i]]), upper = formula(model.fulls[[i]]))
n_obs = bc %>% count() %>% dplyr::first()
forwardBIC[[i]] = step(model.emptys[[i]], scope, direction = "forward", k = log(n_obs))
backwardBIC[[i]] = step(model.fulls[[i]], scope, direction = "backward", k = log(n_obs))
}
Selecting Best Formula per Dataset from Stepwise Regressions
#anova(model2, model.saturated)
#Income Group Dataset
income.best.formula <- backwardBIC[['income']]$call[[2]]
income.best.formula
#Industry Group Dataset
industry.best.formula <- forwardBIC[['industry']]$call[[2]]
industry.best.formula
Stepwise regression using BIC as the criteria (the penalty k = log(n)).
#checking our selected model's predictor variable's VIFs
vif(backwardBIC)
Year
1.661856
NameAlternative_Fuels_and_Electric_Vehicle_Recharging_Property_Credit
1.040066
NameAlternative_Minimum_Tax_Credit
1.355727
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___On_or_after_6_23_08_but_before_7_1_15
1.077139
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___On_or_after_7_1_15
1.032409
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___Prior_to_6_23_08
1.076724
NameClean_Heating_Fuel_Credit
1.086361
NameConservation_Easement_Tax_Credit
1.071241
NameCredit_for_Employment_of_Persons_with_Disabilities
1.050364
NameCredit_for_Purchase_of_an_Automated_External_Defibrillator
1.084252
NameEmpire_State_Apprentice_Tax_Credit
1.028286
NameEmpire_State_Film_Post_Production_Credit
1.067360
NameEmpire_State_Film_Production_Credit
1.079378
NameEZ_QEZE_Tax_Credits___EZ_Investment_Tax_Credit
1.203345
NameEZ_QEZE_Tax_Credits___QEZE_Credit_for_Real_Property_Taxes
1.110401
NameEZ_QEZE_Tax_Credits___QEZE_Tax_Reduction_Credit
1.094191
NameEZ_QEZE_Tax_Credits___QEZE_Tax_Reduction_Credit_For_Corporate_Partners
1.059267
NameFarm_Workforce_Retention_Credit
1.043336
NameFarmers__School_Tax_Credit
1.075009
NameHire_a_Veteran_Credit
1.016637
NameHistoric_Properties_Rehabilitation_Credit
1.066991
NameIndustrial_or_Manufacturing_Business_Tax_Credit
1.180919
NameLong_Term_Care_Insurance_Credit
1.085593
NameLow_Income_Housing_Credit
1.059690
NameManufactureru0092s_Real_Property_Tax_Credit
1.059324
NameManufacturer_s_Real_Property_Tax_Credit
1.051337
NameMinimum_Wage_Reimbursement_Credit
1.074669
NameMortgage_Servicing_Tax_Credit
1.068997
NameNew_York_Youth_Jobs_Program_Tax_Credit
1.109900
NameQETC_Employment_Credit
1.072612
NameSpecial_Additional_Mortgage_Recording_Tax_Credit
1.114775
NameSTART_UP_NY_Tax_Elimination_Credit
1.055977
ENI1_000_000___24_999_999
1.850053
ENI100_000___499_999
1.795816
ENI100_000_000___499_999_999
1.691776
ENI25_000_000___49_999_999
1.578815
ENI50_000_000___99_999_999
1.576866
ENI500_000___999_999
1.708105
ENI500_000_000___and_over
1.744570
ENIZero_or_Net_Loss
1.867768
Num
1.533625
Splitting data up into test data and training data (test data is for year 2019, training is the rest)
dim(income.data.split)
NULL
Lasso regression for comparison to backward stepwise
for (i in k){
X.train <- xy.splits[[i]][['X.train']]
y.train <- xy.splits[[i]][['y.train']]
X.test <- xy.splits[[i]][['X.test']]
y.test <- xy.splits[[i]][['y.test']]
#create lambda grid
lambda.grid = 10^seq(2, -5, length = 100)
#create lasso models with lambda.grid
lasso.models = glmnet(X.train, y.train, alpha = 1, lambda = lambda.grid)
#visualize coefficient shrinkage
plot(lasso.models, xvar = "lambda", label = TRUE, main = paste("Lasso Regression:", i))
#Cross Validation to find best lambda
set.seed(0)
cv.lasso.models <- cv.glmnet(X.train, y.train, alpha = 1, lambda = lambda.grid, nfolds = 10)
#visualize cross validation for lambda that minimizes the mean squared error.
plot(cv.lasso.models, main = paste("Lasso Regression:", i))
#Checking the best lambda
log(cv.lasso.models$lambda.min)
best.lambda <- cv.lasso.models$lambda.min
print(paste(i, ' best.lambda:', best.lambda))
# best lambda with all the variables was found to be 0.0006892612
# best lambda with only the bwdBIC coefficients included was found to be 0.0003053856
#looking at the lasso coefficients for the best.lambda
best.lambda.coeff <- predict(lasso.models, s = best.lambda, type = "coefficients")
print(best.lambda.coeff)
#fitting a model with the best lambda found to be 0.000689 and using it to make predictions for the testing data.
lasso.best.lambda.train.pred <- predict(lasso.models, s = best.lambda, newx = X.test)
lasso.best.lambda.train.pred
#checking MSE
MSE.lasso <- mean((lasso.best.lambda.train.pred - y.test)^2)
sat.data.split <- test_train_split(xy.sat.splits[[i]], Avg.bc ~ .)
sat.model.bc <- lm(Avg.bc ~., data = as.data.frame(cbind(sat.data.split[['X.train']], sat.data.split[['y.train']])))
MSE.sat <- calc_MSE(sat.model.bc, as.data.frame(sat.data.split[['X.test']]), sat.data.split[['y.test']])
print(paste(i, ' Lasso MSE: ', MSE.lasso, ' Saturated MSE: ', MSE.sat))
}
[1] "income best.lambda: 0.000359381366380463"
42 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) -17.284028557
Year 0.013476737
NameAlternative_Fuels_and_Electric_Vehicle_Recharging_Property_Credit -1.311613095
NameAlternative_Minimum_Tax_Credit -2.231733278
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___On_or_after_6_23_08_but_before_7_1_15 1.563930684
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___On_or_after_7_1_15 2.495348478
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___Prior_to_6_23_08 1.252395863
NameClean_Heating_Fuel_Credit -3.196327520
NameConservation_Easement_Tax_Credit -2.276656433
NameCredit_for_Employment_of_Persons_with_Disabilities -3.290414339
NameCredit_for_Purchase_of_an_Automated_External_Defibrillator -3.159453144
NameEmpire_State_Apprentice_Tax_Credit -3.376054527
NameEmpire_State_Film_Post_Production_Credit 0.932428124
NameEmpire_State_Film_Production_Credit 2.721680583
NameEZ_QEZE_Tax_Credits___EZ_Investment_Tax_Credit 0.574494696
NameEZ_QEZE_Tax_Credits___QEZE_Credit_for_Real_Property_Taxes 0.960915301
NameEZ_QEZE_Tax_Credits___QEZE_Tax_Reduction_Credit -1.075189828
NameEZ_QEZE_Tax_Credits___QEZE_Tax_Reduction_Credit_For_Corporate_Partners -1.501009194
NameFarm_Workforce_Retention_Credit -1.861878308
NameFarmers__School_Tax_Credit -1.462208940
NameHire_a_Veteran_Credit -3.075132150
NameHistoric_Properties_Rehabilitation_Credit 1.795906161
NameIndustrial_or_Manufacturing_Business_Tax_Credit -1.970508839
NameLong_Term_Care_Insurance_Credit -2.991508779
NameLow_Income_Housing_Credit -1.276270928
NameManufactureru0092s_Real_Property_Tax_Credit -1.538480115
NameManufacturer_s_Real_Property_Tax_Credit -1.815450711
NameMinimum_Wage_Reimbursement_Credit -1.376381572
NameMortgage_Servicing_Tax_Credit -0.965444029
NameNew_York_Youth_Jobs_Program_Tax_Credit -1.590640664
NameQETC_Employment_Credit -0.853986295
NameSpecial_Additional_Mortgage_Recording_Tax_Credit -0.372417089
NameSTART_UP_NY_Tax_Elimination_Credit -2.300657711
Group1_000_000___24_999_999 1.151959932
Group100_000___499_999 0.353836083
Group100_000_000___499_999_999 1.726777151
Group25_000_000___49_999_999 1.377240385
Group50_000_000___99_999_999 1.526670338
Group500_000___999_999 0.591982587
Group500_000_000___and_over 2.412492613
GroupZero_or_Net_Loss 1.042816832
Num -0.001613791
[1] "income Lasso MSE: 1.45647173028653 Saturated MSE: 1.47368550172763"
[1] "industry best.lambda: 0.000114975699539774"
43 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) -61.87390882
NameAlternative_Minimum_Tax_Credit -3.17738814
NameLong_Term_Care_Insurance_Credit -2.61687663
NameClean_Heating_Fuel_Credit -3.20715109
NameEmpire_State_Film_Production_Credit 2.50121671
NameCredit_for_Purchase_of_an_Automated_External_Defibrillator -2.32395331
NameSTART_UP_NY_Tax_Elimination_Credit -2.73114539
NameMinimum_Wage_Reimbursement_Credit -1.67875085
NameEZ_QEZE_Tax_Credits___QEZE_Credit_for_Real_Property_Taxes 0.91834251
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___On_or_after_6_23_08_but_before_7_1_15 1.63074712
NameHistoric_Properties_Rehabilitation_Credit 1.70377612
NameExcelsior_Jobs_Program_Credit 0.98122860
NameConservation_Easement_Tax_Credit -1.84268727
NameCredit_for_Employment_of_Persons_with_Disabilities -2.02070562
NameFarmers__School_Tax_Credit -1.76586266
NameEZ_QEZE_Tax_Credits___EZ_Investment_Tax_Credit 0.54112912
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___Prior_to_6_23_08 0.78546985
NameManufactureru0092s_Real_Property_Tax_Credit -1.32671371
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___On_or_after_7_1_15 2.14607941
`GroupOther_Services_(except_Public_Administration)` -0.39320233
NameEmpire_State_Film_Post_Production_Credit 0.73195196
GroupManagement_of_Companies_and_Enterprises 0.47699822
NameFarm_Workforce_Retention_Credit -1.68344698
NameManufacturer_s_Real_Property_Tax_Credit -1.55252782
NameQETC_Employment_Credit -1.06268465
NameEZ_QEZE_Tax_Credits___QEZE_Tax_Reduction_Credit_For_Corporate_Partners -1.31500410
Year 0.03534486
NameNew_York_Youth_Jobs_Program_Tax_Credit -0.75302790
NameEZ_QEZE_Tax_Credits___QEZE_Tax_Reduction_Credit -0.63972813
GroupAgriculture__Forestry__Fishing_and_Hunting -0.34353275
GroupConstruction -0.29738810
GroupHealth_Care_and_Social_Assistance -0.29780385
NameHire_a_Veteran_Credit -1.74532218
GroupInformation 0.47997729
NameInvestment_Tax_Credit_for_the_Financial_Services_Industry 0.77828597
GroupFinance_and_Insurance 0.33842083
GroupUtilities 0.48589130
NameEmpire_State_Apprentice_Tax_Credit -2.52886106
NameAlternative_Fuels_and_Electric_Vehicle_Recharging_Property_Credit -0.62586141
GroupManufacturing 0.29977977
GroupProfessional__Scientific__and_Technical_Services 0.27403574
GroupArts__Entertainment__and_Recreation 0.36963834
GroupWholesale_Trade 0.23629280
[1] "industry Lasso MSE: 0.787120640328352 Saturated MSE: 0.757030931332998"
as.data.frame(lasso.best.lambda.train.pred) %>% mutate(Avg_in_dollars = (lasso.best.lambda.train.pred*lambda.bc+1)^(1/lambda.bc))
calc_MSE(sat.model.bc, as.data.frame(income.sat.data.split[['X.test']]), income.sat.data.split[['y.test']])
[1] 1.473686